STATS 506 PS 5

Author

Jingyan Zhang

link to my GitHub repo: https://github.com/Menako2013529/STATS-506-Problem-Set

Show the code
library(tidyverse)
Warning: package 'ggplot2' was built under R version 4.3.2
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.2     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Show the code
library(Rcpp)
library(dplyr)

Problem 1

a.

I referred to ChatGPT for some help.

Show the code
# Define the rational class
setClass(
  "rational",
  slots = c(
    numerator = "numeric",
    denominator = "numeric"
  ),
  validity = function(object) {# 2. Validator
    if (object@denominator == 0) {
      return("Denominator cannot be zero.")
    }
    TRUE
  }
)
Show the code
# Constructor
rational <- function(numerator, denominator) {
  if (denominator == 0) stop("Denominator cannot be zero.")
  new("rational", numerator = numerator, denominator = denominator)
}

Helper function:

Show the code
# Helper: GCD and LCM using Rcpp
Rcpp::cppFunction('
  int gcd(int a, int b) {
    if (b == 0) return abs(a);
    return gcd(b, a % b);
  }
')
Show the code
# Simplify method
setGeneric("simplify", function(object) standardGeneric("simplify"))
Creating a new generic function for 'simplify' in the global environment
[1] "simplify"
Show the code
setMethod("simplify", "rational", function(object) {
  g <- gcd(object@numerator, object@denominator)
  object@numerator <- object@numerator / g
  object@denominator <- object@denominator / g
  object
})
Show the code
# Show method
setMethod("show", "rational", function(object) {
  cat(sprintf("%d/%d\n", object@numerator, object@denominator))
})
Show the code
# Quotient method
setGeneric("quotient", function(object, digits = 7) standardGeneric("quotient"))
[1] "quotient"
Show the code
setMethod("quotient", "rational", function(object, digits = 7) {
  if (!is.numeric(digits) || digits%%1!=0) stop("digits need to be an integer")
  result <- object@numerator / object@denominator
  print(round(result, digits))
  return(round(result, digits))
})
Show the code
# Arithmetic operations
setMethod("+", signature(e1 = "rational", e2 = "rational"), function(e1, e2) {
  num <- e1@numerator * e2@denominator + e2@numerator * e1@denominator
  den <- e1@denominator * e2@denominator
  simplify(rational(num, den))
})

setMethod("-", signature(e1 = "rational", e2 = "rational"), function(e1, e2) {
  num <- e1@numerator * e2@denominator - e2@numerator * e1@denominator
  den <- e1@denominator * e2@denominator
  simplify(rational(num, den))
})

setMethod("*", signature(e1 = "rational", e2 = "rational"), function(e1, e2) {
  num <- e1@numerator * e2@numerator
  den <- e1@denominator * e2@denominator
  simplify(rational(num, den))
})

setMethod("/", signature(e1 = "rational", e2 = "rational"), function(e1, e2) {
  if (e2@numerator == 0) stop("Division by zero.")
  num <- e1@numerator * e2@denominator
  den <- e1@denominator * e2@numerator
  simplify(rational(num, den))
})

b.

Show the code
r1 <- rational(24,6)
r2 <- rational(7,230)
r3 <- rational(0,4)
Show the code
r1
24/6
Show the code
r3
0/4
Show the code
r1 + r2
927/230
Show the code
r1 - r2
913/230
Show the code
r1 * r2
14/115
Show the code
r1 / r2
920/7
Show the code
r1 + r3
4/1
Show the code
r1 * r3
0/1
Show the code
r2 / r3
Error in r2/r3: Division by zero.
Show the code
quotient(r1)
[1] 4
[1] 4
Show the code
quotient(r2)
[1] 0.0304348
[1] 0.0304348
Show the code
quotient(r2, digits = 3)
[1] 0.03
[1] 0.03
Show the code
quotient(r2, digits = 3.14)
Error in quotient(r2, digits = 3.14): digits need to be an integer
Show the code
quotient(r2, digits = "avocado")
Error in quotient(r2, digits = "avocado"): digits need to be an integer
Show the code
q2 <- quotient(r2, digits = 3)
[1] 0.03
Show the code
q2
[1] 0.03
Show the code
quotient(r3)
[1] 0
[1] 0
Show the code
simplify(r1)
4/1
Show the code
simplify(r2)
7/230
Show the code
simplify(r3)
0/1

c.

Show the code
test <- rational(1,0)
Error in rational(1, 0): Denominator cannot be zero.

Problem 2

Show the code
library(plotly)

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout

a.

Show the code
sales_data <- read_csv("df_for_ml_improved_new_market.csv")
Rows: 4347 Columns: 112
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr   (1): eventdate
dbl (111): id, case_id, year, height, width, size_inchsqr, price_usd, meanpr...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Show the code
# Copy from last hw
genre_data <- sales_data %>%
  pivot_longer(
    cols = starts_with("Genre___"), 
    names_to = "genre", 
    values_to = "count"
  ) %>%
  filter(count == 1) %>%
  mutate(genre = str_replace_all(genre, "Genre___", "")) %>%
  group_by(year, genre) %>%
  summarize(total_sales = n(), .groups = 'drop')


plot <- genre_data %>%
  plot_ly(
    x = ~year,
    y = ~total_sales,
    color = ~genre,
    type = 'scatter',
    mode = 'lines+markers',
    fill = 'tonexty',
    text = ~paste("Genre:", genre, "<br>Total Sales:", total_sales, "<br>Year:", year)
  ) %>%
  layout(
    title = "Distribution of Genre Sales Across Years",
    xaxis = list(title = "Year"),
    yaxis = list(title = "Total Sales"),
    legend = list(title = list(text = "Genre")),
    hovermode = "x unified"
  )
plot

b.

I referred to ChatGPT.

Show the code
genre_price_data <- sales_data %>%
  pivot_longer(
    cols = starts_with("Genre___"), 
    names_to = "genre", 
    values_to = "count"
  ) %>%
  filter(count == 1) %>%
  mutate(genre = str_replace_all(genre, "Genre___", "")) %>%
  group_by(year, genre) %>%
  summarize(
    avg_price = mean(price_usd, na.rm = TRUE),
    .groups = 'drop'
  )

# Compute overall trend
overall_price_data <- sales_data %>%
  group_by(year) %>%
  summarize(
    avg_price = mean(price_usd, na.rm = TRUE),
    genre = "Overall",
    .groups = 'drop'
  )

# Combine overall and genre-specific data
combined_data <- bind_rows(genre_price_data, overall_price_data)

plot <- combined_data %>%
  plot_ly(
    x = ~year,
    y = ~avg_price,
    color = ~genre,
    type = 'scatter',
    mode = 'lines+markers',
    text = ~paste("Genre:", genre, "<br>Year:", year, "<br>Avg Price (USD):", round(avg_price, 2)),
    hoverinfo = "text"
  ) %>%
  layout(
    title = "Change in Sales Price Over Time (Overall and By Genre)",
    xaxis = list(title = "Year"),
    yaxis = list(title = "Average Sales Price (USD)"),
    updatemenus = list(
      list(
        type = "dropdown",
        active = 0,
        buttons = list(
          list(label = "Overall",
               method = "update",
               args = list(list(visible = c(TRUE, rep(FALSE, n_distinct(combined_data$genre) - 1))),
                           list(title = "Change in Sales Price Over Time (Overall)"))),
          list(label = "By Genre",
               method = "update",
               args = list(list(visible = c(FALSE, rep(TRUE, n_distinct(combined_data$genre) - 1))),
                           list(title = "Change in Sales Price Over Time (By Genre)")))
        )
      )
    )
  )


plot

Problem 3

Show the code
library(data.table)

Attaching package: 'data.table'
The following objects are masked from 'package:lubridate':

    hour, isoweek, mday, minute, month, quarter, second, wday, week,
    yday, year
The following objects are masked from 'package:dplyr':

    between, first, last
The following object is masked from 'package:purrr':

    transpose
Show the code
library(nycflights13)
Warning: package 'nycflights13' was built under R version 4.3.3

a.

I referred to ChatGPT.

Show the code
flights_dt <- as.data.table(nycflights13::flights)
airports_dt <- as.data.table(nycflights13::airports)

# Compute departure delays per airport
departure_delays <- flights_dt[
  , .(mean_dep_delay = mean(dep_delay, na.rm = TRUE),
      median_dep_delay = median(dep_delay, na.rm = TRUE),
      flight_count = .N),
  by = .(origin)
][flight_count >= 10]  # Filter airports with at least 10 flights

# Join with airports data for airport names
departure_delays <- merge(
  departure_delays,
  airports_dt[, .(faa, name)],
  by.x = "origin",
  by.y = "faa",
  all.x = TRUE
)[order(-mean_dep_delay)]  # Order by descending mean delay

departure_delays <- departure_delays[, .(airport_name = name, mean_dep_delay, median_dep_delay)]

# Compute arrival delays per airport
arrival_delays <- flights_dt[
  , .(mean_arr_delay = mean(arr_delay, na.rm = TRUE),
      median_arr_delay = median(arr_delay, na.rm = TRUE),
      flight_count = .N),
  by = .(dest)
][flight_count >= 10]  # Filter airports with at least 10 flights

# Join with airports data for airport names
arrival_delays <- merge(
  arrival_delays,
  airports_dt[, .(faa, name)],
  by.x = "dest",
  by.y = "faa",
  all.x = TRUE
)[order(-mean_arr_delay)]  # Order by descending mean delay
arrival_delays <- arrival_delays[, .(airport_name = name, mean_arr_delay, median_arr_delay)]
Show the code
# Print the tables
cat("Departure Delays per Airport:\n")
Departure Delays per Airport:
Show the code
print(departure_delays)
          airport_name mean_dep_delay median_dep_delay
1: Newark Liberty Intl       15.10795               -1
2: John F Kennedy Intl       12.11216               -1
3:          La Guardia       10.34688               -3
Show the code
cat("\nArrival Delays per Airport:\n")

Arrival Delays per Airport:
Show the code
print(arrival_delays)
                  airport_name mean_arr_delay median_arr_delay
  1:     Columbia Metropolitan      41.764151             28.0
  2:                Tulsa Intl      33.659864             14.0
  3:         Will Rogers World      30.619048             16.0
  4:      Jackson Hole Airport      28.095238             15.0
  5:             Mc Ghee Tyson      24.069204              2.0
 ---                                                          
 98:       Seattle Tacoma Intl      -1.099099            -11.0
 99:             Honolulu Intl      -1.365193             -7.0
100:                      <NA>      -3.835907             -9.0
101: John Wayne Arpt Orange Co      -7.868227            -11.0
102:         Palm Springs Intl     -12.722222            -13.5

b.

I referred to ChatGPT.

Show the code
planes_dt <- as.data.table(nycflights13::planes)

# Compute the fastest aircraft model using data.table
fastest_aircraft <- merge(flights_dt, planes_dt, by = "tailnum", all.x = TRUE)[
  !is.na(air_time) & air_time > 0,  # Exclude missing or zero air_time
  .(time = air_time / 60, 
    mph = distance / (air_time / 60), 
    model)
][
  !is.na(model),  # Exclude rows with missing model
  .(avgmph = mean(mph, na.rm = TRUE), nflights = .N), 
  by = model
][order(-avgmph)][1]  # Order by descending avgmph and take the top row

# Print the result
print(fastest_aircraft)
     model   avgmph nflights
1: 777-222 482.6254        4